{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第四步：再次直接调用xgboost内嵌的cv寻找最佳的参数n_estimators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from xgboost import XGBClassifier\n",
    "import xgboost as xgb\n",
    "\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "\n",
    "from sklearn.metrics import log_loss\n",
    "\n",
    "from matplotlib import pyplot\n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>Year</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>...</th>\n",
       "      <th>virtual</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3795</td>\n",
       "      <td>1897.500000</td>\n",
       "      <td>1897.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>28</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.0</td>\n",
       "      <td>3</td>\n",
       "      <td>5500</td>\n",
       "      <td>1833.333333</td>\n",
       "      <td>1375.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3100</td>\n",
       "      <td>1550.000000</td>\n",
       "      <td>1033.333333</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3750</td>\n",
       "      <td>1875.000000</td>\n",
       "      <td>1875.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>5</td>\n",
       "      <td>21</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "      <td>7500</td>\n",
       "      <td>2500.000000</td>\n",
       "      <td>2500.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>30</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 227 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   bathrooms  bedrooms  price  price_bathrooms  price_bedrooms  room_diff  \\\n",
       "0        1.0         1   3795      1897.500000     1897.500000        0.0   \n",
       "1        2.0         3   5500      1833.333333     1375.000000       -1.0   \n",
       "2        1.0         2   3100      1550.000000     1033.333333       -1.0   \n",
       "3        1.0         1   3750      1875.000000     1875.000000        0.0   \n",
       "4        2.0         2   7500      2500.000000     2500.000000        0.0   \n",
       "\n",
       "   room_num  Year  Month  Day  ...   virtual  walk  walls  war  washer  water  \\\n",
       "0       2.0  2016      6   28  ...         0     0      0    0       0      0   \n",
       "1       5.0  2016      6    4  ...         0     0      0    0       0      0   \n",
       "2       3.0  2016      6    3  ...         0     0      0    1       0      0   \n",
       "3       2.0  2016      5   21  ...         0     0      0    0       0      0   \n",
       "4       4.0  2016      4   30  ...         0     0      0    0       0      0   \n",
       "\n",
       "   wheelchair  wifi  windows  work  \n",
       "0           0     0        0     0  \n",
       "1           0     0        0     0  \n",
       "2           0     0        0     0  \n",
       "3           0     0        0     0  \n",
       "4           0     0        0     0  \n",
       "\n",
       "[5 rows x 227 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.read_csv(\"train_data.csv\")\n",
    "train_y = data['interest_level']\n",
    "train_x = data.drop([\"interest_level\"], axis=1)\n",
    "\n",
    "train_x.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "改小此时学习率为0.02，调整弱分类数目\n",
    "\n",
    "此前已经调好的参数： n_estimators：193 max_depth：6 min_child_weight：3 reg_alpha：0 reg_lambda：1，subsample=0.7,colsample_bytree=0.8,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of train is: 0.4531389130494976\n"
     ]
    }
   ],
   "source": [
    "# 设置xgboost参数（sklearn框架下）\n",
    "xgb4 = XGBClassifier(\n",
    "        learning_rate=0.02,\n",
    "        n_estimators=2000,  #这次调小了学习率，要适当增大n_estimators的最大值\n",
    "        max_depth=6,\n",
    "        min_child_weight=3,\n",
    "        gamma=0,\n",
    "        subsample=0.7,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel=0.7,\n",
    "        objective='multi:softprob',\n",
    "        seed=3\n",
    "        )\n",
    "\n",
    "#直接调用xgboost内嵌的交叉验证（cv），可对连续的n_estimators参数进行快速交叉验证\n",
    "#而GridSearchCV只能对有限个参数进行交叉验证\n",
    "def modelfit(alg, x_train, y_train, cv_folds=5, early_stopping_rounds=10):\n",
    "    \n",
    "    xgb_param = alg.get_xgb_params()\n",
    "    xgb_param['num_class'] = 3\n",
    "    \n",
    "    #直接调用xgboost，而非sklarn的wrapper类\n",
    "    xgtrain = xgb.DMatrix(x_train, label=y_train)\n",
    "    \n",
    "    cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_xgb_params()['n_estimators'], nfold=cv_folds,\n",
    "                     metrics='mlogloss', early_stopping_rounds=early_stopping_rounds)\n",
    "    \n",
    "    cvresult.to_csv('2_nestimators.csv', index_label = 'n_estimators')\n",
    "    \n",
    "    #最佳参数n_estimators\n",
    "    n_estimators = cvresult.shape[0]\n",
    "    \n",
    "    # 采用交叉验证得到的最佳参数n_estimators，训练模型\n",
    "    alg.set_params(n_estimators=n_estimators)\n",
    "    alg.fit(x_train, y_train, eval_metric='mlogloss')\n",
    "    \n",
    "    #Predict training set:\n",
    "    train_predprob = alg.predict_proba(x_train)\n",
    "    logloss = log_loss(y_train, train_predprob)\n",
    "\n",
    "   #Print model report:\n",
    "    print ('logloss of train is:', logloss)\n",
    "    \n",
    "modelfit(xgb4, train_x, train_y)    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'base_score': 0.5,\n",
       " 'booster': 'gbtree',\n",
       " 'colsample_bylevel': 0.7,\n",
       " 'colsample_bytree': 0.8,\n",
       " 'gamma': 0,\n",
       " 'learning_rate': 0.02,\n",
       " 'max_delta_step': 0,\n",
       " 'max_depth': 6,\n",
       " 'min_child_weight': 3,\n",
       " 'missing': None,\n",
       " 'n_estimators': 860,\n",
       " 'nthread': 1,\n",
       " 'objective': 'multi:softprob',\n",
       " 'reg_alpha': 0,\n",
       " 'reg_lambda': 1,\n",
       " 'scale_pos_weight': 1,\n",
       " 'seed': 3,\n",
       " 'silent': 1,\n",
       " 'subsample': 0.7}"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb4.get_xgb_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "E:\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: from_csv is deprecated. Please use read_csv(...) instead. Note that some of the default arguments are different, so please refer to the documentation for from_csv when changing your function calls\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xl8XHW9//HXJ0nT0DZdk9Il3SmlC9BCKRQQKogUkLIKVMALKhWvqKBeL1y9iigKXEXZRBGBHyK7iAgoKrvK0hYo0NZC6ZruS9J9S/L5/fE9SabpJJm0mZyZzPv5eJxH5pzznXM+c2Yynznf7/d8j7k7IiIiAHlxByAiIplDSUFEROooKYiISB0lBRERqaOkICIidZQURESkjpKCSAIz+x8zuzvuOETioqSQZcysi5ktMrPPJCwrNrMlZnZuwrLxZva0mVWYWaWZzTGz682sR7T+EjOrNrPN0bTAzL6U5tgnmVl5OvfREsnicfcfufsX0rS/RWb2iXRsOx3a6v3KtuPS3ikpZBl33wxMA24xs9Jo8U3ADHd/HMDMjgZeAv4JHOTu3YHJQBVwaMLmXnP3Lu7eBTgXuMnMxrXNK5GWMLOCuGOQHOHumrJwAu4DHgImAeuAvgnr/gHc1szzLwH+0WDZm8BnEuanALOBSkKSGZmwbmS0rDIqMyVh3anAHGATsAz4JtAZ2AbUAJujqV8jr+sO4Jno+W8Aw1I4HgcBfwPWA/OA8/YmHuBa4IHoeYMBBy4FlgIVwOXAEcC70Wu/PWE/w4AXovdjLfA7oHu07rfRvrZF+/pWCsd4EfDf0b52AAXR/LLotcwDTkxyLI4CVgL5CcvOAt6NHk8AZgAbgVXAzY0c00lAeSPrugH3A2uAxcB3gLxoXT7w0+gYLASuiI5jQSPbWgR8opF1lwHzo/f1qdrPDGDAz4DVwIboGI1p7P2O+/81m6bYA9C0l28c9ABWRP94lyYs7wxUA5Oaef4lJCSF6IuuEjgwmj8Q2AKcBHQAvhX9cxZG8/OB/4nmT4j+AUdEz10BfCwhzsOix41+ySTEcV/0BTAh+hL8HfBwM8/pTPjSvjR6zmHRcRnd0nhInhR+CRQBnwS2A08CvYH+0ZfS8VH5A6Lj1REoBV4Bfp6w7d2+/Jo6xgnl3wEGAPsBI6LX2S8hvqQJE/gIOClh/jHg6ujxa8DF0eMuwFGNbKPR94uQEP4IFEdxfAB8Plp3OeFLuSw63n9nL5JC9LlaG72fHYHbgFeidScDM4HuhAQxkuiHUWPvt6bUJlUfZSl3ryD8wuwEPJGwqgehWnBl7QIzuylqV9hiZt9JKHtUtHwz4Szht8CH0brzgWfc/W/uvgv4CeGL6WjCL9EuwA3uvtPdXwCeBqZGz90FjDKzru5e4e5vtfDlPeHub7p7FSEpjG2m/KeARe5+r7tXRfv7PaFKrDXi+YG7b3f3vxK+xB9y99Xuvgx4FRgH4O7zo+O1w93XADcDxzex3aaOca1b3X2pu28jJPuO0Wvp4O6L3P2jRrb9ENH7YWbFhF/PDyUcjwPMrMTdN7v76y05GGaWH8V+jbtvcvdFhDODi6Mi5wG3uHt59Dm9oSXbT3AhcI+7v+XuO4BrgIlmNjh6DcWEM0Rz97nuviLh9e3L+53TlBSylJldRPiF9nfgxoRVFYRqir61C9z9Wx7aFf5A+CVd63V37+6hTaEPMBr4UbSuH6FaoHYbNYRfqf2jdUujZbUWR+sAziF8CS02s5fNbGILX97KhMdbCQmoKYOAI6MEV2lmlYQvlD6tFM+qhMfbksx3ATCz3mb2sJktM7ONwANASRPbbeoY11qasH4+cCXhbGZ1tK9+jWz7QeBsM+sInA285e61+/o84Szl32Y23cw+1USMyZQQzhAXJyxLfP/7Jcbd4HFLNDw+mwlVc/2jHyK3E6oaV5nZXWbWNSq6r+93TlNSyEJm1ptQn3oZ8EXgPDM7DsDdtxDq4c9uyTbdfRXh1/Xp0aLlhC/b2n0aoRpjWbRugJklfn4GRutw9+nufgahiuVJ4NHa3bQkphZYCrwcJbjaqYu7f6mN4/lxtM1D3L0rcBGhaqNWw/01dYyTPsfdH3T3Y6PnObv/IEgsN4fwhXoK8BlCkqhd96G7TyUcjxuBx82sc+ovk7WEX+ODEpbVvf+E6puyhHUDWrDtRA2PT2egF/Wfs1vd/XDCj5kDgf+Kljf2fksKlBSy0+3Ak+7+YnTK/C3g19GvQqL5z5nZ1VECwczKgCGNbdDMehEaI2dHix4FTjOzE82sA/ANQmPnvwhJZwvwLTPrYGaTCMnkYTMrNLMLzaxbVCWykVDtAeEXdi8z69ZKx6HW08CBZnZxFE8HMzvCzEa2cTzFhEbkSjPrT/QllWAVMDRhvqljvAczG2FmJ0Tv83bCWUp1srKRB4GvAscR2hRqt3ORmZVGZyaV0eJGt2NmRYkT4Uz0UeD6qDv0IODrhDOj2tf1NTPrb2bdCY3jzenQYD8FUfyXmtnY6DX/CHjD3RdF7++R0XHbEh2P6mbeb0lF3I0amlo2AWcSfkF1b7D8eeD6hPkjgWcJ//SVwPvA9UCvaP0lhH+W2p43qwl1zr0TtnEWocFwA/AyUcNttG50tGxDVOasaHkh8BdCNdZGYDpwbMLz7iFUAVTSeO+jHybMT6KZxumo3AhCj6U10fZfILRFtCgekjc0FySULyehEZ/wRfidhGMyMzqe7xC+5MsTyp4BLIn29c0UjvEidm+YPoTQ9rOJ0Bj/dLJjmFB+IOEL/JkGyx+I3u/NhB8BZzby/EnR6284HUBou3ogOt5Lge9S3/uogHAmu47Q++gqwpmFNbKfRUn28cNo3eWERvPa11sWLT+R0ONoM/U9vbo0935ran6y6ACLiKSFmZ0C/NLdBzVbWGKn6iMRaVVmtp+ZnWpmBVE12vcInRwkC+hMQbKCmX0M+HOydR56T0mGMLNOhKqwgwjtHs8AX3P3jbEGJilRUhARkTqqPhIRkTpZN8hWSUmJDx48OO4wRESyysyZM9e6e2lz5bIuKQwePJgZM2bEHYaISFYxs8XNl1L1kYiIJEhbUjCze8xstZm938j6g8zsNTPbYWbfTFccIiKSunSeKdxHuLFLY9YTLsH/SRpjEBGRFkhbUnD3Vwhf/I2tX+3u0wmXv4uISAbIijYFM5tmZjPMbMaaNWviDkdEpN3KiqTg7ne5+3h3H19a2myPKhER2UtZkRRERKRt5ExSWL5sCa888yCbN2v4FRGRxqTt4jUze4gwHnuJmZUTRkrsAODuvzSzPsAMoCtQY2ZXAqPSNWjWynef57jpV/LRoGF0GXNkOnYhIpL10pYUPNzur6n1K9n9ln1p1alX2NXmNeWE+8+IiEhDOVN91H3/cJvYbevLY45ERCRz5UxS6NVnIADVG1bEHImISObKmaTQoWMnKulC3uaVcYciIpKxciYpAFTm96Jw2+q4wxARyVg5lRQ2F5bQeaeuiBYRaUxOJYUF27vRrWpt3GGIiGSsnEoKfcsGU0ol23fsjDsUEZGMlFNJIa9bPwqshrWrlscdiohIRsqppFDUoz8AlauXxByJiEhmyqmkUFwarmresnZpzJGIiGSmnEoK3fcfBMDOClUfiYgkk1NJobikHzVu1GzUVc0iIsnkVFKw/A5UUEz+ynfiDkVEJCOlbZTUTLWhsDf7WX7cYYiIZKScOlMA2FpYSvEuXdUsIpJMziWFqs7706tmHTU1HncoIiIZJ+eSAt3K6GUbWV2xIe5IREQyTs4lhY69wn0V1iz7KOZIREQyT84lheL9hwBw/59fjTkSEZHMk3NJoaT/cABOGVAdcyQiIpkn55JCUa8yqsnDKzXUhYhIQ2lLCmZ2j5mtNrP3G1lvZnarmc03s3fN7LB0xbKb/A6s9h5sXr2wTXYnIpJN0nmmcB8wuYn1pwDDo2kacGcaY9nN1v36MDB/XVvtTkQka6QtKbj7K8D6JoqcAdzvwetAdzPrm654Em3v1J9eVatw17UKIiKJ4mxT6A8kVuyXR8v2YGbTzGyGmc1Ys2bfr0b2bv3pwzrWbtq+z9sSEWlP4kwKlmRZ0p/u7n6Xu4939/GlpaX7vOOCnoMotGpWLl+8z9sSEWlP4kwK5cCAhPkyoE1udNCld7hW4a6nXm6L3YmIZI04k8JTwGejXkhHARvcvU1udNCz/zAApgzWtQoiIonSNnS2mT0ETAJKzKwc+B7QAcDdfwk8C5wKzAe2ApemK5aGOpWEO7BV61oFEZHdpC0puPvUZtY78OV07b9JRV3Z4J2pXK7xj0REEuXcFc21NnXcn0G6VkFEZDc5mxS2dR5AadUKqqpr4g5FRCRj5GxSeGtzDwawmmUVW+IORUQkY+RsUjjy8PF0tF0sX7og7lBERDJGziaF7v0PBODhv+haBRGRWjmbFLpFSeGE/bfGHImISObI2aRg3QZQRT75lRpCW0SkVs4mBfILWEZv8isXxR2JiEjGyN2kANTkdWQAK9mlbqkiIkCOJ4VZ+aMYaKtYtl7tCiIikONJ4bCxh9PVtlG+ojzuUEREMkJOJ4XuZaEH0gPPqFuqiAjkeFIo7jscgBP33xxzJCIimSGnk4L1GEINBkteizsUEZGMkNNJgQ5FrKCUomqdKYiIQK4nBaCm13CG2XLWbd4RdygiIrHL+aTw5qZShtpyPli5Me5QRERil/NJ4RPHfYwi28XKxfPiDkVEJHY5nxS6DhgFwMv/+lfMkYiIxC/nk4KVHgTAYZ1XxxyJiEj8cj4p0KknFXSlY8X8uCMREYldWpOCmU02s3lmNt/Mrk6yfpCZPW9m75rZS2ZWls54GrO9+zCG2HLWb9kZx+5FRDJG2pKCmeUDdwCnAKOAqWY2qkGxnwD3u/shwHXAj9MVT1N8+0YOsGVc+OvX49i9iEjGSOeZwgRgvrsvcPedwMPAGQ3KjAKejx6/mGR9myg+8rP0tM1cdEiXOHYvIpIx0pkU+gNLE+bLo2WJZgHnRI/PAorNrFcaY0qquGwMAC/989W23rWISEZJZ1KwJMu8wfw3gePN7G3geGAZULXHhsymmdkMM5uxZs2a1o90/1CrNbZweetvW0Qki6QzKZQDAxLmy4DdvnXdfbm7n+3u44BvR8s2NNyQu9/l7uPdfXxpaWnrR1rclw10ocemD9hZpbuwiUjuSmdSmA4MN7MhZlYIXAA8lVjAzErMrDaGa4B70hhP48zYVTqakXlL+GDVplhCEBHJBGlLCu5eBVwBPAfMBR5199lmdp2ZTYmKTQLmmdkHwP7A9emKpzkd+o5hhC3lqw/OjCsEEZHYFaRz4+7+LPBsg2XfTXj8OPB4OmNIVfGgseS9+xvOHLQr7lBERGKjK5ojeX0PBmDhbF2rICK5S0mhVulBVJPHkOpF1NQ07CQlIpIblBRqddiPDXThIFvMgrW6E5uI5CYlhQQdDpjEQbaEafersVlEclNaG5qzTec1syjOW8PHBxfGHYqISCx0ppAg7/SfAbDgXd1wR0Ryk5JCon7jABhR/SHbdlbHHIyISNtTUkjUqSfL8/pwcN4C3lu2x2gbIiLtnpJCA72GH8khtpBvPvZO3KGIiLQ5JYUGOg4cz4C8NRRsWxd3KCIibU5JoaGoXWHwrg9x10VsIpJblBQa6nsoDoz2j1i0bmvc0YiItCklhYaKurKr+wEckreAz933ZtzRiIi0KSWFJDoMGs+4vPls3KoRU0UktygpJGEDjqLENtJzR7naFUQkpygpJDNwIgCH8m/KK7bFHIyISNtRUkim5EA2WRfG2zxeX6CuqSKSO5pNCmY2zMw6Ro8nmdlXzax7+kOLUV4eXQ44hiPyP+CGP/877mhERNpMKmcKvweqzewA4DfAEODBtEaVAWzgUQyz5eRvW6eb7ohIzkglKdS4exVwFvBzd78K6JvesDJAXbvCPE6//R8xByMi0jZSSQq7zGwq8B/A09GyDukLKUP0G4fnF3JE3jw+XK07sYlIbkglKVwKTASud/eFZjYEeCC9YWWADkVY2QSOzZ9NYb7a40UkNzT7befuc9z9q+7+kJn1AIrd/YZUNm5mk81snpnNN7Ork6wfaGYvmtnbZvaumZ26F68hfYZOYpQtosOO9VRu3Rl3NCIiaZdK76OXzKyrmfUEZgH3mtnNKTwvH7gDOAUYBUw1s1ENin0HeNTdxwEXAL9o6QtIq6GTAJiYN4dz79Td2ESk/UulXqSbu28EzgbudffDgU+k8LwJwHx3X+DuO4GHgTMalHGga+1+gOWphd1G+o3DO3bl2Lz3NTieiOSEVJJCgZn1Bc6jvqE5Ff2BpQnz5dGyRNcCF5lZOfAs8JVkGzKzaWY2w8xmrFmzpgUh7KP8Amzwxzg2/32qapxd1TVtt28RkRikkhSuA54DPnL36WY2FPgwhedZkmUNO/xPBe5z9zLgVOC3ZrZHTO5+l7uPd/fxpaWlKey6FQ09noG2mgG2ik/d+mrb7ltEpI2l0tD8mLsf4u5fiuYXuPs5KWy7HBiQMF/GntVDnwcejbb7GlAElKQSeJsZOgmAj+fNYsHaLbGGIiKSbqk0NJeZ2R/MbLWZrTKz35tZWQrbng4MN7MhZlZIaEh+qkGZJcCJ0X5GEpJCG9YPpaDkQCjuy8T8uVRVqwpJRNq3VKqP7iV8mfcjtAn8KVrWpOgq6CsIVU9zCb2MZpvZdWY2JSr2DeAyM5sFPARc4pk2VrUZDD+J4/LepYAqTr9NVzeLSPtVkEKZUndPTAL3mdmVqWzc3Z8lNCAnLvtuwuM5wDGpbCtWB55C57fuZ0LeXN5cc0jc0YiIpE0qZwprzewiM8uPpouA3BpPeugkKCjik/lvsava2VFVHXdEIiJpkUpS+ByhO+pKYAVwLmHoi9xR2Ak6dOKc/FcA57RbVYUkIu1TKr2Plrj7FHcvdffe7n4m4UK23LJfT7qwjYNsKQvVC0lE2qm9Hent660aRTa49BkAvlLwB6prnFUbt8cckIhI69vbpJDswrT2rbgP9D+c4R0rADQWkoi0S3ubFDKr22hbGTmFA6s+YJCtprxiG9W6I5uItDONJgUz22RmG5NMmwjXLOSe0WcBcHHXt3Bg9Pf+Em88IiKtrNGk4O7F7t41yVTs7qlc39D+9BgEZRP4fPe3ANi+S1c3i0j7oluKtdSYc7BV7zOyYAUAp9+mQfJEpP1QUmip0WcCxh8+Fsb2+/fKTfHGIyLSipQUWqq4Dwz7OEWzH6FjvrOr2jn7F/+MOyoRkVahpLA3DvssbCxnaq/5ALy3bEPMAYmItI5Uhs5O1gtpaTSc9tC2CDLjjDgNOvXi2v4zMNDZgoi0G6mcKdwM/Bdh2Owy4JvArwn3XL4nfaFlsIJCKCiCuU/zsX6hB5LOFkSkPUglKUx291+5+yZ33+judwGnuvsjQI80x5e5Ln4ScO6v+U7d2cKZd2igPBHJbqkkhRozO8/M8qLpvIR1uXtJb+mB0LErbF7JoWVdAXh/2caYgxIR2TepJIULgYuB1dF0MXCRme1HuLNa7jr1J1C1nSdP3kFhvlFV44z6rq5yFpHslcrQ2Qvc/XR3L4mm0919vrtvc/fcri8ZfRYU94XXbuOt734SgK07qznvlxosT0SyUyq9j8qinkarzWyVmf3ezMraIriMV1AI+YWw4CW6VMylqCAczumLKmIOTERk76RSfXQv8BRhELz+wJ+iZQLwxZfB8uC3ZzP3B5OB0NBy1h3qoioi2SeVpFDq7ve6e1U03QeUpjmu7LFfDzjiMthWgW1czqFl3QB4e2kl7rnbDi8i2SmVpLDWzC4ys/xoughYl8rGzWyymc0zs/lmdnWS9T8zs3ei6QMzq2zpC8gIE78MNVVw94n88Ypj6+5AdND/qtFZRLJLKknhc8B5wEpgBXAucGlzTzKzfOAO4BRgFDDVzEYllnH3q9x9rLuPBW4DnmhZ+BmixyDosj9sWgmVSzlicLh8Y0dVjXojiUhWSaX30RJ3n+Lupe7e293PBM5OYdsTgPlR76WdhCugz2ii/FTgoZSizkRf+DvkFcCrP+XRy4+mc8d8IPRG0q07RSRb7O2AeF9PoUx/YGnCfHm0bA9mNggYArzQyPppZjbDzGasWbOmpbG2je4DoFMJzLwPKhYx+/uT6w7ujMUV6qYqIllhb5OCNV8kaZnGWl4vAB539+pkK939Lncf7+7jS0szuI172gthTKS/fQ+AI4b0rDsIby6q4PxfvRZfbCIiKdjbpJBKt5pyYEDCfBmwvJGyF5DNVUe1uvaDzqUw50lY9A8e+eJEFvz41PrEsHB9rOGJiDSn0aTQyJDZG81sE+GaheZMB4ab2RAzKyR88T+VZD8jCAPrtY+f0V9+A/I7wu/Og5pqzIzxUcOzA0Ovfibe+EREmtBoUnD3YnfvmmQqdveC5jbs7lWEsZGeA+YCj7r7bDO7zsymJBSdCjzs7aVTf2EnOOuXsGsL3H4EAI9dfjRdoobnGpQYRCRzWbZ9F48fP95nzJgRdxhNc4cbB8GOzfD1OeEWnsCY7/2FzTtCs4kBC284LcYgRSSXmNlMdx/fXDndjjMdzOCyF8Fr4M5j6ha///3JdWcMDgy++hkOvva5mIIUEdmTkkK69BoG3QbA1rUw9091i99P6KoKsGl7FWO+pwvcRCQzKCmk01dmQmFneOyScLVzZMENpzFhcP1N6zbvqFY7g4hkBCWFdCoohMteCtVIdxwV2hoij15+NEcO6Vk3X0OoTtK1DCISJyWFdCs9EE65CbZXwC1jd1v1yBcn7pYYAN5YuF7JQURio6TQFo74AnTqBZWLYMHLu6165IsTWXTDaXu8EW8sXK+2BhFpc0oKbcEMvjYLCvaDB86GDeV7FFlww2l7nDVs3lGtHkoi0qZ0nUJbWvsh3D4+3Kntf5ZDh/2SFht69TPUJFleXFTAe9eenN4YRaRd0nUKmahkOJSODA3PN4+CmqTj/yU9a4DQfVVnDiKSTjpTiMMt46BiARx+KXzqZ6F6qRHDrnmG6kbeojzCSKyPfHFieuIUkXYj1TMFJYW43DwaNpaHC9yuer/Z4gdf+xybtlc1uj7f4KMfa9gMEUlOSSHTucNPR8DmVSknBmg+OUBIEOMH6wxCROopKWSDmpqQGLasblFiADj/V6/xRor3Z1ADtYgoKWSLxMTQtSwkhibaGJJpqt0hmSPVDiGSc5QUsklNDdw8EjavDHduu2pOGCKjhc7/1WvMWLS+RQkC1B4hkguUFLKNO7z6E3jhh2D58N+LoKjrXm+uJdVLyajKSaR9UVLIVm8/AH/8crjA7T/fCGMntYJUGqhToaonkeykpJDNFv0D7ouqcy54EA5q/aqd1koSiZQwRDKXkkK221AOPz84XP18zJXw8W/vVTtDS7S0wbqlVCUlEh8lhfZg13a45dDQAF3YBaa9FIbKaEPpOKNojpKHSOtTUmhP5v4JHrkoPD7lJjjiMsiLd9iqfW3Ibk1KIiLNy4ikYGaTgVuAfOBud78hSZnzgGsJ97Kf5e6faWqbOZkUADaugF8cBdsrQ++k/3wNSkfEHVWjMilppELtIdLexZ4UzCwf+AA4CSgHpgNT3X1OQpnhwKPACe5eYWa93X11U9vN2aQAodvqrIfgyS+F+aO/Asf/N3QsjjeufbC311ZkEyUcyQSZkBQmAte6+8nR/DUA7v7jhDI3AR+4+92pbjenk0KtLWvhzqPDuEkA5/wGxpzT4iuhs1EcbRzZTtVrApmRFM4FJrv7F6L5i4Ej3f2KhDJPEs4mjiFUMV3r7nvcg9LMpgHTAAYOHHj44sWL0xJz1imfAb85KfRQsvzQEN33kLijynhKLNKc9niVfyYkhU8DJzdIChPc/SsJZZ4GdgHnAWXAq8AYd69sbLs6U2igphreuh+evjLMdyqFL/wVeg6NN64ckm3tJ5K9RvYp5s9XHrdXz001KRTs1dZTUw4MSJgvA5YnKfO6u+8CFprZPGA4of1BUpGXD+MvhdFnwZ3HhHs03DoOuvSFL/wNug9ofhuyTzKtvUBJqv2q3LYr7ftIZ1KYDgw3syHAMuACoGHPoieBqcB9ZlYCHAgsSGNM7dd+3eHrs2HTSnj5RphxD/x8DIy7GI69CnoNiztCaSOZlqTaWrovwoxTWY/k93VvTenuknoq8HNCe8E97n69mV0HzHD3p8zMgJ8Ck4Fq4Hp3f7ipbar6KEWVS0N7w6YVYb5TLzj3XhhyXE40SIvI7mJvU0gXJYUW2rQKXv8F/PMWwMNAe5NvgEMvgKJucUcnIm1ESUF2t2sbzP5DGIHVa8Kywy+FI74AfcbEG5uIpJ2SgjRu2Vvw4Pnhbm8QurOefReMnJL2QfdEJB6Z0PtIMlX/w+C/PoSt6+Gd38Ff/xd+//mw7mPfgLEXqmFaJEfpTEHC7UA/egGeuAy2RV0ZO3aFT/4QRk2B/XrEG5+I7DNVH8ne2bAM3n0EXvhBfdvDfj3g5B/BiFND11cRyTpKCrJv3GH5WzD7SfjXrfXLh58cLpQbcYoShEgWUVKQ1uMeGqdnPwGv3V6//MDJ9QlC3VtFMpqSgqSHOyybGbq3JiaIoh4w6WoYfpIaqUUykJKCpF9NTX2CeOPO+jaIgiI47LMw/JMw+FjokP5L80WkaUoK0vbWL4T5f4e/XA01CUNTH/AJOOAknUWIxEhJQeK1azss/id8+LdwFlGr9ixi2Ikw6Ggo6hpfjCI5RElBMkvtWcSHf4MPn6tfXlgMEy6DocfDgCNV1SSSJkoKkrl2bYfy6bDwFXj1p+DV9essPzRYD/049BsH+broXqQ1KClI9tixCZa8DgtfhtfuqG+wBijqDhOvgEETof/hOpMQ2UtKCpK9tqwLCWLRP2DmvbsniY7FMP5zUHZEmIr7xBenSBZRUpD2Y+t6WPomLPlXuKPcjk27rx99VkgQ/cdD30OhQ1E8cYpkMCUFab92bYeV74Z2iVdvhq1rd19veXDEZVA2PrRL9BwGeXnxxCqSIZQUJLdsWgnlM0Ki+NdtuzdeQ2jAPvKL0Hcs9BsLvQ6AvPx4YhWJgZKC5LbqKljzb1jxDix/B6b/es8ylg9WV28NAAAM9ElEQVQTpoUk0XcslAxXopB2S0lBpKHqKlg7LySJFe/Am3clL1fcF078XkgWJQcqUUi7oKQgkorqKlj7Qf0ZxZu/Sl6uS1/4+NXQ52DoPUpdYyXrZERSMLPJwC1APnC3u9/QYP0lwP8By6JFt7v73U1tU0lB0q6mOkoUs0KiSBymI9HBnw5Jos/B0OcQ6FzStnGKtEDsScHM8oEPgJOAcmA6MNXd5ySUuQQY7+5XpLpdJQWJhTtULoYV78LK92D63fW3Lk1U1B0Ouxh6j4b9R0PpCCjo2PbxijSQalJI5xgCE4D57r4gCuhh4AxgTpPPEslEZtBjcJhGTYETvh2Wb10fksTKd+Gft8KW1aH3U0Ojz4b9R4Wqp96joPsgdZOVjJTOpNAfWJowXw4cmaTcOWZ2HOGs4ip3X9qwgJlNA6YBDBw4MA2hiuylTj3DYH5Dj4ejvxKWVVfB+o9g1fuwag6smh3uWjf7id2fa/kw7sLorGJU+Nu5V9u/BpEE6UwKlmRZw7qqPwEPufsOM7sc+H/ACXs8yf0u4C4I1UetHahIq8ovCNVGpSNgzDn1y7dvDN1kV88JyWL63fDW/Xs+v6gbHDo12sZBUDJCyULaTDqTQjkwIGG+DFieWMDd1yXM/hq4MY3xiMSrqCsMmBAmgFNvCm0Vm1fD6tkhUayeC7MehDd+uefzBx0TusjWJpySEdC1X6jaEmkl6UwK04HhZjaE0LvoAuAziQXMrK+7r4hmpwBz0xiPSOYxg+L9wzQsOkk+845wq9ON5bBmXpjWzoPZT4YbF+2xjTwYc25IGCUHQK/h4Q536jYreyFtScHdq8zsCuA5QpfUe9x9tpldB8xw96eAr5rZFKAKWA9ckq54RLJKXh50Hxim4SeFZVNuC2cWW9bUJ4q1H8Ksh+G9R5Nvp6g7HHJeSBS1CaNrfzVyS6N08ZpIe7FzC6z7CNZ9CGvnh7/vPdZ4+VFnhqE9eg0PY0GVHBDaM6Rdiv06hXRRUhBpIXfYvCqcVdQmjFkPwraK5OUtH8ZODYmi59AwymzPIVDYuW3jllalpCAizavaCRUL6xPGuo/g7d82Xn7QMSFB9BwWJYxo6til7WKWvZIJF6+JSKYrKKzvzVTrjNvD3+0bYf2C3adZDydv7AYYOLH+rKJXQtLoWJz+1yGtRmcKItJyOzbB+oXhIr31C2DdApj10J73sahl+XDI+dBr6O5nGGrDaDOqPhKReOzYHKqk1n1Uf4bxzoNNJ4wxZ0OPIVGyGBIed+mtazBakZKCiGSenVuiM4wF0VnGQnj7gcYTBsBBnwqJovugMPUYBN0GQGGntou7HVCbgohknsLO0GdMmGpNuTX8rdoJlUvCWcb6hdHfBfDvpxvfnuXD6DPDQIXdB0WDFg6CrmVhuBFpMR01EckMBYXhWomSA/ZcV1MTRqCtXAIVi6FyUfg79yl4//eNb7N20MG6hDE4PO5coqqpRqj6SESyX3UVbFwW7nlRsSgkjIpF8P7jTT9vxKn1VVI9BtdfRd4Oe0yp+khEckd+QfTFPgiGHFe//NzfhL87t0RnGYvqE0blYpj3bNPbHXl61JYxcPepHSaNWkoKItL+FXaG3iPD1JA7bF0XJYol9dOM38DcPzW93XaYNFR9JCLSFHfYsjZKFot3TxrNGTklShSJiWNALElDXVJFRNpC7ci1eySNe5p/bqde4YZKbZA0lBRERDLBviaNsZ+BbgOhW1m4bWuPwXsVhpKCiEg2SJY0KhaHW7U2vKivaxl8ffZe7Ua9j0REsoFZGNKjS28oS/jOPv3n4W9NDWxdCxuWwn490h6OkoKISCbLy6tPGm2xuzbZi4iIZAUlBRERqaOkICIidZQURESkTlqTgplNNrN5ZjbfzK5uoty5ZuZm1mx3KRERSZ+0JQUzywfuAE4BRgFTzWxUknLFwFeBN9IVi4iIpCadZwoTgPnuvsDddwIPA2ckKfcD4CZgexpjERGRFKQzKfQHlibMl0fL6pjZOGCAuzdxayUREWkr6bx4LdltjerG1DCzPOBnwCXNbshsGjAtmt1sZvP2MqYSYO1ePre907FpnI5N43RsGpdpx2ZQKoXSmRTKgQEJ82XA8oT5YmAM8JKF2+L1AZ4ysynuvtvgRu5+F3DXvgZkZjNSGfsjF+nYNE7HpnE6No3L1mOTzuqj6cBwMxtiZoXABcBTtSvdfYO7l7j7YHcfDLwO7JEQRESk7aQtKbh7FXAF8BwwF3jU3Web2XVmNiVd+xURkb2X1gHx3P1Z4NkGy77bSNlJ6Ywlss9VUO2Yjk3jdGwap2PTuKw8Nll3PwUREUkfDXMhIiJ1lBRERKROziSFVMdhaq/MbICZvWhmc81stpl9LVre08z+ZmYfRn97RMvNzG6Njte7ZnZYvK8gvcws38zeNrOno/khZvZGdFweiXrQYWYdo/n50frBccadbmbW3cweN7N/R5+difrMBGZ2VfS/9L6ZPWRmRe3hc5MTSSHVcZjauSrgG+4+EjgK+HJ0DK4Gnnf34cDz0TyEYzU8mqYBd7Z9yG3qa4RecrVuBH4WHZcK4PPR8s8DFe5+AOHiyxvbNMq2dwvwF3c/CDiUcIxy/jNjZv0JY7aNd/cxQD6h2332f27cvd1PwETguYT5a4Br4o4r5mPyR+AkYB7QN1rWF5gXPf4VMDWhfF259jYRLqx8HjgBeJpwNf5aoKDh54fQxXpi9LggKmdxv4Y0HZeuwMKGr0+fGYf6YXx6Rp+Dp4GT28PnJifOFEhhHKZcEp26jiOMTLu/u68AiP7W3gg2l47Zz4FvATXRfC+g0sO1NrD7a687LtH6DVH59mgosAa4N6pau9vMOqPPDO6+DPgJsARYQfgczKQdfG5yJSk0OQ5TLjGzLsDvgSvdfWNTRZMsa3fHzMw+Bax295mJi5MU9RTWtTcFwGHAne4+DthCfVVRMjlzbKJ2lDOAIUA/oDOh+qyhrPvc5EpSaG4cppxgZh0ICeF37v5EtHiVmfWN1vcFVkfLc+WYHQNMMbNFhOHdTyCcOXQ3s9qLOxNfe91xidZ3A9a3ZcBtqBwod/fae508TkgSuf6ZAfgEsNDd17j7LuAJ4GjawecmV5JCk+Mw5QILow7+Bpjr7jcnrHoK+I/o8X8Q2hpql3826lFyFLChtsqgPXH3a9y9zMP4WxcAL7j7hcCLwLlRsYbHpfZ4nRuVz8hffPvK3VcCS81sRLToRGAOOf6ZiSwBjjKzTtH/Vu2xyf7PTdyNGm3YMHQq8AHwEfDtuOOJ4fUfSzhdfRd4J5pOJdRrPg98GP3tGZU3Qo+tj4D3CL0sYn8daT5Gk4Cno8dDgTeB+cBjQMdoeVE0Pz9aPzTuuNN8TMYCM6LPzZNAD31m6o7N94F/A+8DvwU6tofPjYa5EBGROrlSfSQiIilQUhARkTpKCiIiUkdJQURE6igpiIhIHSUFERGpo6QgkgIzG2tmpybMT2mtIdjN7Eoz69Qa2xLZV7pOQSQFZnYJ4WKsK9Kw7UXRtte24Dn57l7d2rGI6ExB2hUzGxzdDObX0Q1Q/mpm+zVSdpiZ/cXMZprZq2Z2ULT809GNU2aZ2SvR0CjXAeeb2Ttmdr6ZXWJmt0fl7zOzOy3cxGiBmR1vZvdEcdyXsL87zWxGFNf3o2VfJQyo9qKZvRgtm2pm70Ux3Jjw/M1mdp2ZvQFMNLMbzGxOdEObn6TniErOifuSak2aWnMCBhNuKDQ2mn8UuKiRss8Dw6PHRxLGo4EwREP/6HH36O8lwO0Jz62bB+4jDKZnhJEzNwIHE350zUyIpXY4iHzgJeCQaH4RUBI97kcYV6eUMErpC8CZ0ToHzqvdFuF+BZYYpyZN+zrpTEHao4Xu/k70eCYhUewmGkL8aOAxM3uHcIOYvtHqfwL3mdllhC/wVPzJ3Z2QUFa5+3vuXgPMTtj/eWb2FvA2MJpwF8CGjgBe8jD6ZhXwO+C4aF01YZRbCIlnO3C3mZ0NbE0xTpEmFTRfRCTr7Eh4XA0kqz7KI9wQZWzDFe5+uZkdCZwGvGNme5RpYp81DfZfAxSY2RDgm8AR7l4RVSsVJdlOsnH3a233qB3B3avMbAJhdM4LgCsIw36L7BOdKUhO8nCDoYVm9mmou+n8odHjYe7+hrt/l3DbxAHAJqB4H3bZlXCTmg1mtj+735AlcdtvAMebWUl0b/GpwMsNNxad6XRz92eBKwmjmYrsM50pSC67ELjTzL4DdCC0C8wC/s/MhhN+tT8fLVsCXB1VNf24pTty91lm9jahOmkBoYqq1l3An81shbt/3MyuIYzLb8Cz7v7HPbdIMfBHMyuKyl3V0phEklGXVBERqaPqIxERqaPqI2n3zOwOwr2YE93i7vfGEY9IJlP1kYiI1FH1kYiI1FFSEBGROkoKIiJSR0lBRETq/H9QcoKkXOQ6gwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1bccea6e828>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "cvresult = pd.DataFrame.from_csv('2_nestimators.csv')\n",
    "        \n",
    "# plot\n",
    "test_means = cvresult['test-mlogloss-mean']\n",
    "test_stds = cvresult['test-mlogloss-std'] \n",
    "        \n",
    "train_means = cvresult['train-mlogloss-mean']\n",
    "train_stds = cvresult['train-mlogloss-std'] \n",
    "\n",
    "x_axis = range(0, cvresult.shape[0])\n",
    "        \n",
    "pyplot.errorbar(x_axis, test_means, yerr=test_stds ,label='Test')\n",
    "pyplot.errorbar(x_axis, train_means, yerr=train_stds ,label='Train')\n",
    "pyplot.title(\"XGBoost n_estimators vs Log Loss\")\n",
    "pyplot.xlabel( 'n_estimators' )\n",
    "pyplot.ylabel( 'Log Loss' )\n",
    "pyplot.savefig( 'n_estimators2.png' )\n",
    "\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 保存模型，供测试使用"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "cPickle是python2的库，python3改成了pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "#保存模型\n",
    "import pickle\n",
    "pickle.dump(xgb4, open(\"xgb_model.pkl\", 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of train is: 0.4531389130494976\n"
     ]
    }
   ],
   "source": [
    "#保存数据\n",
    "import pickle\n",
    "\n",
    "xgb = pickle.load(open(\"xgb_model.pkl\", 'rb'))\n",
    "\n",
    "train_predprob = xgb.predict_proba(train_x)\n",
    "logloss = log_loss(train_y, train_predprob)\n",
    "\n",
    "#Print model report:\n",
    "print ('logloss of train is:', logloss)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>Year</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>...</th>\n",
       "      <th>virtual</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3100</td>\n",
       "      <td>1550.0</td>\n",
       "      <td>1033.333333</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "      <td>6000</td>\n",
       "      <td>2000.0</td>\n",
       "      <td>2000.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2400</td>\n",
       "      <td>1200.0</td>\n",
       "      <td>2400.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>28</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2825</td>\n",
       "      <td>1412.5</td>\n",
       "      <td>941.666667</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>16</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2700</td>\n",
       "      <td>1350.0</td>\n",
       "      <td>900.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 227 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   bathrooms  bedrooms  price  price_bathrooms  price_bedrooms  room_diff  \\\n",
       "0        1.0         2   3100           1550.0     1033.333333       -1.0   \n",
       "1        2.0         2   6000           2000.0     2000.000000        0.0   \n",
       "2        1.0         0   2400           1200.0     2400.000000        1.0   \n",
       "3        1.0         2   2825           1412.5      941.666667       -1.0   \n",
       "4        1.0         2   2700           1350.0      900.000000       -1.0   \n",
       "\n",
       "   room_num  Year  Month  Day  ...   virtual  walk  walls  war  washer  water  \\\n",
       "0       3.0  2016      4    6  ...         0     0      0    0       0      0   \n",
       "1       4.0  2016      5    3  ...         0     0      0    0       0      0   \n",
       "2       1.0  2016      6   28  ...         0     0      0    0       0      0   \n",
       "3       3.0  2016      4   16  ...         0     0      0    0       0      0   \n",
       "4       3.0  2016      6    3  ...         0     0      0    0       0      0   \n",
       "\n",
       "   wheelchair  wifi  windows  work  \n",
       "0           0     0        0     0  \n",
       "1           0     0        0     0  \n",
       "2           0     0        0     0  \n",
       "3           0     0        0     0  \n",
       "4           0     0        0     0  \n",
       "\n",
       "[5 rows x 227 columns]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dtest = pd.read_csv(\"test_data.csv\")\n",
    "test_y = dtest['interest_level']\n",
    "test_x = dtest.drop([\"interest_level\"], axis=1)\n",
    "\n",
    "test_x.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of test is: 0.5864387755589302\n"
     ]
    }
   ],
   "source": [
    "# 预测结果是属于高中低三档的概率\n",
    "y_test_pred = xgb.predict_proba(test_x)\n",
    "\n",
    "logloss_pred = log_loss(test_y, y_test_pred)\n",
    "\n",
    "#Print model report:\n",
    "print ('logloss of test is:', logloss_pred)\n",
    "\n",
    "out_df1 = pd.DataFrame(y_test_pred)\n",
    "out_df1.columns = [\"high\", \"medium\", \"low\"]\n",
    "\n",
    "out_df = pd.concat([out_df1], axis = 1)\n",
    "out_df.to_csv(\"xgb_Rent.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>high</th>\n",
       "      <th>medium</th>\n",
       "      <th>low</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.070582</td>\n",
       "      <td>0.364570</td>\n",
       "      <td>0.564849</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.002433</td>\n",
       "      <td>0.073354</td>\n",
       "      <td>0.924213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.088436</td>\n",
       "      <td>0.326452</td>\n",
       "      <td>0.585112</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.093584</td>\n",
       "      <td>0.387353</td>\n",
       "      <td>0.519064</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.145465</td>\n",
       "      <td>0.467163</td>\n",
       "      <td>0.387372</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       high    medium       low\n",
       "0  0.070582  0.364570  0.564849\n",
       "1  0.002433  0.073354  0.924213\n",
       "2  0.088436  0.326452  0.585112\n",
       "3  0.093584  0.387353  0.519064\n",
       "4  0.145465  0.467163  0.387372"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
